{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "normal_folder = \"/home/biswajit/data/ims/normal/\"\n",
    "inner_folder = \"/home/biswajit/data/ims/inner/\"\n",
    "outer_folder = \"/home/biswajit/data/ims/outer/\"\n",
    "ball_folder = \"/home/biswajit/data/ims/ball/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of normal files:  750\n",
      "Total number of inner_fault files:  750\n",
      "Total number of outer_fault files:  750\n",
      "Total number of ball_fault files:  750\n"
     ]
    }
   ],
   "source": [
    "normal_files = glob.glob(normal_folder + \"/*\")\n",
    "print(\"Total number of normal files: \", len(normal_files))\n",
    "\n",
    "inner_files = glob.glob(inner_folder + \"/*\")\n",
    "print(\"Total number of inner_fault files: \", len(inner_files))\n",
    "\n",
    "outer_files = glob.glob(outer_folder + \"/*\")\n",
    "print(\"Total number of outer_fault files: \", len(outer_files))\n",
    "\n",
    "ball_files = glob.glob(ball_folder + \"/*\")\n",
    "print(\"Total number of ball_fault files: \", len(ball_files))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_files =  normal_files[650:750] + inner_files[650:750] + outer_files[650:750] + ball_files[650:750]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For normal: 550 100\n",
      "For inner: 550 100\n",
      "For outer: 550 100\n",
      "For ball: 550 100\n"
     ]
    }
   ],
   "source": [
    "normal_files_train, normal_files_validation = train_test_split(normal_files[:650], test_size = 100, random_state = 32)\n",
    "inner_files_train, inner_files_validation = train_test_split(inner_files[:650], test_size = 100, random_state = 323)\n",
    "outer_files_train, outer_files_validation = train_test_split(outer_files[:650], test_size = 100, random_state = 123)\n",
    "ball_files_train, ball_files_validation = train_test_split(ball_files[:650], test_size = 100, random_state = 285)\n",
    "\n",
    "print(\"For normal:\", len(normal_files_train), len(normal_files_validation))\n",
    "print(\"For inner:\", len(inner_files_train), len(inner_files_validation))\n",
    "print(\"For outer:\", len(outer_files_train), len(outer_files_validation))\n",
    "print(\"For ball:\", len(outer_files_train), len(outer_files_validation))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total train_files: 2200\n",
      "Total validation_files: 400\n",
      "Total test_files: 400\n"
     ]
    }
   ],
   "source": [
    "train_files = normal_files_train + inner_files_train + outer_files_train + ball_files_train\n",
    "validation_files = normal_files_validation + inner_files_validation + outer_files_validation + ball_files_validation\n",
    "\n",
    "print(\"Total train_files:\", len(train_files))\n",
    "print(\"Total validation_files:\", len(validation_files))\n",
    "print(\"Total test_files:\", len(test_files))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.shuffle(train_files)\n",
    "np.random.shuffle(validation_files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import pandas as pd\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tf_data_generator(file_list,  batch_size = 4):\n",
    "    i = 0\n",
    "    while True:\n",
    "        if i*batch_size >= len(file_list):  \n",
    "            i = 0\n",
    "            np.random.shuffle(file_list)\n",
    "        else:\n",
    "            file_chunk = file_list[i*batch_size:(i+1)*batch_size]\n",
    "            data = []\n",
    "            labels = []\n",
    "            patterns = tf.constant([\".*(normal)\", \".*(inner)\", \".*(outer)\", \".*(ball)\"])\n",
    "            for file in file_chunk:\n",
    "                temp = pd.read_csv(open(file,'r'), sep = \"\\s+\", header = None)\n",
    "                fault_columns = [0, 4, 2, 6]              # In this order, 0-normal, 4-inner, 2-outer, 6-ball\n",
    "                num = np.int(np.floor(len(temp[0])/1024)) # As all columns have same number of entries\n",
    "                j = 0\n",
    "                for pattern in patterns:\n",
    "                    if re.match(pattern.numpy(), tf.constant(file).numpy()):\n",
    "                        labels = labels + list(np.repeat(j,num)) \n",
    "                        column_number = fault_columns[j]\n",
    "                        break\n",
    "                    j = j + 1   \n",
    "                data = data + list(temp[column_number][0:num*1024].values.reshape(num,32,32,1))\n",
    "                       \n",
    "            data = np.asarray(data).reshape(-1,32,32,1)\n",
    "            labels = np.asarray(labels)\n",
    "            \n",
    "            # Shuffle data\n",
    "            index = np.random.permutation(len(data))\n",
    "            data, labels = data[index], labels[index]\n",
    "            \n",
    "            yield data, labels\n",
    "            i = i + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 50\n",
    "train_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [train_files, batch_size],\n",
    "                                              output_shapes = ((None, 32, 32, 1), (None,)),\n",
    "                                              output_types = (tf.float32, tf.float32))\n",
    "\n",
    "validation_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [validation_files, batch_size],\n",
    "                                                   output_shapes = ((None, 32, 32, 1), (None,)),\n",
    "                                                   output_types = (tf.float32, tf.float32))\n",
    "\n",
    "test_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [test_files, batch_size],\n",
    "                                             output_shapes = ((None, 32, 32, 1), (None,)),\n",
    "                                             output_types = (tf.float32, tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dataset = train_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)\n",
    "validation_dataset = validation_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras import layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "conv2d (Conv2D)              (None, 28, 28, 32)        832       \n",
      "_________________________________________________________________\n",
      "max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         \n",
      "_________________________________________________________________\n",
      "conv2d_1 (Conv2D)            (None, 10, 10, 16)        12816     \n",
      "_________________________________________________________________\n",
      "max_pooling2d_1 (MaxPooling2 (None, 5, 5, 16)          0         \n",
      "_________________________________________________________________\n",
      "flatten (Flatten)            (None, 400)               0         \n",
      "_________________________________________________________________\n",
      "dense (Dense)                (None, 120)               48120     \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 84)                10164     \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 16)                1360      \n",
      "_________________________________________________________________\n",
      "dense_3 (Dense)              (None, 4)                 68        \n",
      "=================================================================\n",
      "Total params: 73,360\n",
      "Trainable params: 73,360\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model = tf.keras.Sequential([\n",
    "    layers.Conv2D(32,5,activation= 'relu', input_shape = (32,32,1)),\n",
    "    layers.MaxPool2D(2),\n",
    "    layers.Conv2D(16,5,activation = 'relu'),\n",
    "    layers.MaxPool2D(2),\n",
    "    layers.Flatten(),\n",
    "    layers.Dense(120,activation = 'relu'),\n",
    "    layers.Dense(84, activation = 'relu'),\n",
    "    layers.Dense(16, activation = 'relu'),\n",
    "    layers.Dense(4, activation = 'softmax')\n",
    "])\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "44.0 8.0 8.0\n"
     ]
    }
   ],
   "source": [
    "steps_per_epoch = np.ceil(len(train_files)/batch_size)\n",
    "validation_steps = np.ceil(len(validation_files)/batch_size)\n",
    "steps = np.ceil(len(test_files)/batch_size)\n",
    "print(steps_per_epoch, validation_steps, steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train for 44.0 steps, validate for 8.0 steps\n",
      "Epoch 1/10\n",
      "44/44 [==============================] - 58s 1s/step - loss: 0.9204 - accuracy: 0.5699 - val_loss: 0.7856 - val_accuracy: 0.5648\n",
      "Epoch 2/10\n",
      "44/44 [==============================] - 52s 1s/step - loss: 0.6047 - accuracy: 0.7085 - val_loss: 0.5611 - val_accuracy: 0.7393\n",
      "Epoch 3/10\n",
      "44/44 [==============================] - 51s 1s/step - loss: 0.5143 - accuracy: 0.7551 - val_loss: 0.4582 - val_accuracy: 0.7819\n",
      "Epoch 4/10\n",
      "44/44 [==============================] - 46s 1s/step - loss: 0.4301 - accuracy: 0.7948 - val_loss: 0.3974 - val_accuracy: 0.8119\n",
      "Epoch 5/10\n",
      "44/44 [==============================] - 48s 1s/step - loss: 0.3067 - accuracy: 0.8745 - val_loss: 0.1917 - val_accuracy: 0.9639\n",
      "Epoch 6/10\n",
      "44/44 [==============================] - 49s 1s/step - loss: 0.1240 - accuracy: 0.9647 - val_loss: 0.0648 - val_accuracy: 0.9880\n",
      "Epoch 7/10\n",
      "44/44 [==============================] - 47s 1s/step - loss: 0.0511 - accuracy: 0.9890 - val_loss: 0.0435 - val_accuracy: 0.9886\n",
      "Epoch 8/10\n",
      "44/44 [==============================] - 48s 1s/step - loss: 0.0483 - accuracy: 0.9843 - val_loss: 0.0222 - val_accuracy: 0.9956\n",
      "Epoch 9/10\n",
      "44/44 [==============================] - 47s 1s/step - loss: 0.0407 - accuracy: 0.9860 - val_loss: 0.0933 - val_accuracy: 0.9634\n",
      "Epoch 10/10\n",
      "44/44 [==============================] - 48s 1s/step - loss: 0.0303 - accuracy: 0.9902 - val_loss: 0.0156 - val_accuracy: 0.9952\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x7f838c152810>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.compile(loss = 'sparse_categorical_crossentropy', optimizer = \"adam\", metrics = [\"accuracy\"])\n",
    "model.fit(train_dataset, validation_data = validation_dataset, \n",
    "         steps_per_epoch= steps_per_epoch,\n",
    "         validation_steps = validation_steps,\n",
    "          epochs = 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8/8 [==============================] - 8s 984ms/step - loss: 0.0602 - accuracy: 0.9934\n"
     ]
    }
   ],
   "source": [
    "test_loss, test_acc = model.evaluate(test_dataset, steps = steps)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally model can be saved using following command."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.save(\"IAI_IMS_final.h5\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have commented the above line as different iterations may lead to different accuracies. We had saved a model with all its parameters when the test accuracy was 99.84%. Readers can run this notebook several times for different epochs and save their best model."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow_gpu_master",
   "language": "python",
   "name": "tensorflow_gpu_master"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
